Language/OS - Multiplatform Resource Library

home *** CD-ROM | disk | FTP | other *** search

/ Language/OS - Multiplatform Resource Library / LANGUAGE OS.iso / dsp / fft / fft_eyal.lha / fft_eyal / fftg.c < prev next >

Wrap

C/C++ Source or Header | 1991-08-31 | 12.0 KB | 362 lines

/* ------------------------------ fftg.c ------------------------------------ */ /* */ /* Author: Eyal Lebedinsky */ /* Date: May 1990 */ /* Version: 9 June 1991 */ /* */ /* Program generator for integer DFFT. This program was developed for a very */ /* specific need and no attempt was made to make it general. Still, the basic */ /* algorithm is a standard one from the literature. For maximum performance */ /* of the integer calculation a special mechanism was employed to track shift */ /* operations that could be delayed. Coefficients are kept as 16 bits with 1 */ /* sign bit, 1 integer bit and 14 fractional bits. So a typicall multiply */ /* will do 16bit*16bits=32bits, then shift down one bit and use the HIGH 16 */ /* bits as the result. Before adding two numbers they must be scaled down one */ /* bit to avoid overflow. Adding more numbers calls for more scaling. The */ /* delayed scaling allows the intermediate array to hold numbers that are */ /* scaled to a varying degree until a clash occurs. Well, this space is too */ /* short to elaborate any further and if you did similar kind of work then */ /* you should know what I am talking about. */ /* */ /* usage: fftgc File Size EpName */ /* File - output file name. */ /* Size - sample order: 8 for 256 points, 10 for 1024... */ /* EpName - name of the generated function ('_' added) */ /* Now assemble the output file and then use as: */ /* */ /* #define M 8 [or whatever size] */ /* #define N (1 << M) */ /* short x[N]; [input/output array] */ /* short qf[(N/2)+1]; [power spectrum] */ /* extern void far fft () */ /* */ /* [then call it as: fft();] */ /* */ /* This is a modified version of Sorensen et al, IEEE ASSP-35 no 6. */ /* */ /* - uses 3+3 complex mult. */ /* - no pre scrambling. descrambling done when power spectrum calculated. */ /* - moved to 0-origin as proper for c. */ /* - trig values in table. */ /* - the case for sin==cos treated separately. */ /* - uses delayed scaling down. */ /* - does length 256 fft with 642 mults. */ /* */ /* */ /* This program is released into the public domain. */ /* */ /*----------------------------------------------------------------------------*/ #include <stdio.h> #include <math.h> #define MAXPOINTS 1024 #define SC15 32768 /* scaling factor 2^15 */ /*** About SC15: Fractions are kept as 16 bit quantities with one sign bit, no integer bits and 15 fractional bits. SC15 is used to convert a floating point number (in the range -1 .. 1) into such an integer. There is no integer part because all fractions are really kept already scaled down by a factor of 2. This is a result of the fact that all fractions are constants (sin() and cos() values) and whenever we multiply a point by a fraction we process to scale the result down by one bit anywat - so the reduce constants save us some processing time. ***/ static short mm[MAXPOINTS+1]; /* digit reverse indices */ /*** About mm[]: The values in the sample points array x[] are not scramled at any stage. Instead, the array mm[] indicated where a point should be if we DID do the scrambling. All indices into x[] use mm[] and this is how the power spectrum extracts the data. This means that you CANNOT use the x[] array after calling fft() because the points are not in place. If you want to change the output of fft() then just modify the last part of this program (the power spectrum logic) to calculate whatever you need. ***/ static int cntx[9]; /* stats counters */ /*** About cntx[]: This array gathers statistics: 0 memory loads 1 memory stores 2 register shifts 3 adds 4 mults 5 memory shifts due to smad() correction ***/ static char *file_name, *ep_name; extern void start_fft (), end_fft (); extern void fft1 (), /* scaling */ fft2 (), fft3 (), fft4 (), fft5 (), /* SRFFT */ fft7 (), fft8 (); /* power spectrum */ /*** about ad[], mad and smad(): Because we do integer arithmetic, adding two numbers can overflow the integer magnitude. For this reason the integers are scaled down (by one bit) before the addition. Sometimes we add more that two numbers and it is necessary to scale further. The FFT algorithm does not use all of the points in the same way in one pass, which means that some data needs to be scaled and some does not need to. Actualy, in the first pass some points are not used at all. This means that SOME of the points would have been scaled down. If we add (in a later stage) such a point with an un-scaled point the result is invalid. For this purpose the array ad[] keeps track of how far each point was scaled, smad() checks that a point is properly scaled and mad is the needed scaling at this stage. As the FFT progresses, some points are kept at a different scaling untill they are used in an expression with other points. If smad() finds a point out of adjustment the it issues a scaling instruction using the fft1() function. The program was adjusted to optimize the scaling (the various fft2() ... fft5() have internal scaling rules) and you may notice that smad() hardly ever needs to do any correction (usually there is only one final shift). This makes the whole ad[] almost redundant but it was left in for future use. If your smaple points are less that 16 bits then you still need to keep scaling you data. You need to have enough spare bits to avoid the scaling, and the number depends on the size of the sample - the larger the sample the more stages the FFT has and the more spare bits you need. ***/ static short ad[MAXPOINTS+1]; /* how far each point is adjusted */ static int mad; /* maximum adjust at this level */ static void smad (i, j) /* set proper adjust */ int i, j; { while (ad[i] < (mad-j)) { fprintf (stderr, "x[%u] >>= %d (%d-%d)\n", i, 1, mad, j); fft1 (mm[i]); ++ad[i]; cntx[0] += 1; cntx[1] += 1; cntx[2] += 1; cntx[5] += 1; } } main (argc, argv) int argc; char *argv[]; { int n, m, j, i, k, is, id, n1, n2, n4, n8, ind, i0, i1, i2, i3, i4, i5, i6, i7, i8, cc1, ss1, cc3, ss3; float e, a, a3; /* ---------- Handle arguments ------------------------------------------- */ if (argc < 4) { printf ("usage: fftg File PowerOf2 EpName [stderrFile]\n"); exit (0); } file_name = argv[1]; sscanf (argv[2], "%u", &m); ep_name = argv[3]; if (argc > 4) freopen (argv[4], "wt", stderr); /* ---------- Initialise ------------------------------------------------ */ start_fft (file_name, ep_name); n = 1 << m; for (i = 0; i < 9; ++i) cntx[i] = 0; mad = 0; for (i = 1; i <= n; ++i) ad[i] = 0; /* ---------- Digit reverse counter -------------------------------------- */ for (i = 1; i <= n; ++i) { mm[i] = i-1; /* '-1' for shifting to 0-origin */ } j = 1; n1 = n - 1; for (i = 1; i <= n1; ++i) { if (i < j) { mm[i] = j-1; /* '-1' for shifting to 0-origin */ mm[j] = i-1; /* '-1' for shifting to 0-origin */ } for (k = n / 2; k < j; k /= 2) j -= k; j += k; } /* ---------- Length two butterflies ------------------------------------- */ for (is = 1, id = 4; is < n; is = 2 * id - 1, id = 4 * id) { for (i0 = is; i0 <= n; i0 += id) { i1 = i0 + 1; smad (i0, 0); smad (i1, 0); fft2 (mm[i0], mm[i1]); ++ad[i0]; ++ad[i1]; cntx[0] += 2; cntx[1] += 2; cntx[2] += 2; cntx[3] += 2; } } /* ---------- L shaped butterflies --------------------------------------- */ n2 = 2; for (k = 2; k <= m; ++k) { ++mad; n2 = n2 * 2; n4 = n2 / 4; n8 = n2 / 8; e = 6.2831853071719586 / n2; /* 2 * pi */ cc1 = (SC15/2) * 0.7071067811865475; /* sqrt (0.5) */ for (is = 0, id = n2 * 2; is < n; is = 2 * id - n2, id *= 4) { for (i = is; i <= n - 1; i += id) { i1 = i + 1; i2 = i1 + n4; i3 = i2 + n4; i4 = i3 + n4; smad (i1, 0); smad (i3, 1); smad (i4, 1); fft3 (mm[i1], mm[i3], mm[i4]); ad[i1] += 1; ad[i3] += 2; ad[i4] += 2; cntx[0] += 3; cntx[1] += 3; cntx[2] += 5; cntx[3] += 4; if (n4 != 1) { i1 = i1 + n8; i2 = i2 + n8; i3 = i3 + n8; i4 = i4 + n8; smad (i1, 1); smad (i2, 0); smad (i3, 1); smad (i4, 1); fft4 (mm[i1], mm[i2], mm[i3], mm[i4], cc1); ad[i1] += 2; ad[i2] += 1; ad[i3] += 2; ad[i4] += 2; cntx[0] += 4; cntx[1] += 4; cntx[2] += 2; cntx[3] += 6; cntx[4] += 2; } } } for (j = 2, a = e; j <= n8; ++j, a += e) { a3 = 3 * a; cc1 = (SC15/2) * cos (a); ss1 = (SC15/2) * sin (a); cc3 = (SC15/2) * cos (a3); ss3 = (SC15/2) * sin (a3); for (is = 0, id = n2 * 2; is < n; is = 2 * id - n2, id *= 4) { for (i = is; i <= n - 1; i += id) { i1 = i + j; i2 = i1 + n4; i3 = i2 + n4; i4 = i3 + n4; i5 = i + n4 - j + 2; i6 = i5 + n4; i7 = i6 + n4; i8 = i7 + n4; smad (i1, 0); smad (i2, 0); smad (i3, 2); smad (i4, 2); smad (i5, 0); smad (i6, 0); smad (i7, 1); smad (i8, 1); if (ind = (ad[i3] < (mad-1))) { ++ad[i3]; ++ad[i4]; cntx[2] += 1; } fft5 (mm[i1], mm[i2], mm[i3], mm[i4], mm[i5], mm[i6], mm[i7], mm[i8], ss1-cc1, -ss1-cc1, cc1, ss3-cc3, -ss3-cc3, cc3, ind); ad[i1] += 1; ad[i2] += 1; ad[i3] += 2; ad[i4] += 2; ad[i5] += 1; ad[i6] += 1; ad[i7] += 2; ad[i8] += 2; cntx[0] += 8; cntx[1] += 8; cntx[2] += 4; cntx[3] += 18; cntx[4] += 6; } } } } /* ---------- force final adjustment ------------------------------------- */ ++mad; for (i = 1; i <= n; ++i) smad (i, 0); /* ---------- Power spectrum (no sqrt) ----------------------------------- */ fft7 (0, mm[1]); for (i = 1; i < n/2; ++i) fft8 (i, mm[i+1], mm[n-i+1]); fft7 ((n/2), mm[n/2+1]); /* ---------- end and show stats ----------------------------------------- */ end_fft (file_name, ep_name); for (i = 0; i < 9; ++i) fprintf (stderr, " %5u", i); fprintf (stderr, "\n Load Store Shift Add Mult Adj\n"); for (i = 0; i < 9; ++i) fprintf (stderr, " %5u", cntx[i]); fprintf (stderr, "\n"); exit (0); }